Exercise 1

Students from the Bachelor degree in Physics performed an experiment to study the Zeeman effect. The apparatus contains a Ne source lamp whose position can be changed. During the setting up of the apparatus, the source position has to be adjusted in order to maximize the intensity of the detected light signal. The following table gives the position of the source (in mm) and the corresponding height of the peak (arbitrary units) for the wavelength under study:

\(x_i\) = 2.44, 3.49, 3.78, 3.31, 3.18, 3.15, 3.1, 3.0, 3.6, 3.4

\(y_i\) = 129, 464, 189, 562, 589, 598, 606, 562, 360, 494

Assume a quadratic dependence of the peak height, \(y_i\) as a function of the source position \(x_i\),

\[ f(x)=c_0 + c_1x + c_2x^2 \]

All the measured values are affected by a Gaussian noise with zero mean, such that

\[ y_i = f(x_i) + \epsilon \]

where \(\epsilon\) follows a normal distribution with mean \(\mu = 0\) and unknown standard deviation, \(\sigma\).

  1. Build a Markov Chain Monte Carlo to estimate the best parameters of the quadratic dependence of the data and the noise that affects the measured data.
# This code is a modified version of the code that can be found at https://github.com/ehalley/PBI/tree/master/PBI_scripts

library(mvtnorm)

# Metropolis (MCMC) algorithm to sample from function func.
# The first argument of func must be a real vector of parameters, 
# the initial values of which are provided by the real vector thetaInit.
# func() returns a two-element vector, the logPrior and logLike 
# (log base 10), the sum of which is taken to be the log of the density 
# function (i.e. unnormalized posterior). The MCMC sampling PDF is the 
# multivariate Gaussian with fixed covariance, sampleCov. A total of 
# Nburnin+Nsamp samples are drawn, of which the last Nsamp are kept. 
# ... is used to pass data, prior parameters etc. to func().
# If demo=FALSE (default), then
# return a Nsamp * (2+Ntheta) matrix (no names), where the columns are
# 1:  log10 prior PDF
# 2:  log10 likelihood
# 3+: Ntheta parameters
# (The order of the parameters in thetaInit and sampleCov must match.)
# If demo=TRUE, return the above (funcSamp) as well as thetaPropAll, a 
# Nsamp * Ntheta matrix of proposed steps, as a two element named list.
metrop <- function(func, thetaInit, Nburnin, Nsamp, sampleCov, verbose, 
                   demo=FALSE, ...) {

  Ntheta   <- length(thetaInit)
  thetaCur <- thetaInit
  funcCur  <- func(thetaInit, ...) # log10
  funcSamp <- matrix(data=NA, nrow=Nsamp, ncol=2+Ntheta) 
  # funcSamp will be filled and returned
  nAccept  <- 0
  acceptRate <- 0
  
  if(demo) {
    thetaPropAll <- matrix(data=NA, nrow=Nsamp, ncol=Ntheta)
  }
  
  for(n in 1:(Nburnin+Nsamp)) {

    #cat("cur", funcCur)
    # Metropolis algorithm. No Hastings factor for symmetric proposal
    if(is.null(dim(sampleCov))) { # theta and sampleCov are scalars
      thetaProp <- rnorm(n=1, mean=thetaCur, sd=sqrt(sampleCov))
    } else {
      thetaProp <- rmvnorm(n=1, mean=thetaCur, sigma=sampleCov, 
                           method="eigen")
    }
    funcProp  <- func(thetaProp, ...) 
    logMR <- sum(funcProp) - sum(funcCur) # log10 of the Metropolis ratio
    if(logMR>=0 || logMR>log10(runif(1, min=0, max=1))) {
      thetaCur   <- thetaProp
      funcCur    <- funcProp
      nAccept    <- nAccept + 1
      acceptRate <- nAccept/n
    }
    if(n>Nburnin) {
      funcSamp[n-Nburnin,1:2] <- funcCur
      funcSamp[n-Nburnin,3:(2+Ntheta)] <- thetaCur
      if(demo) {
        thetaPropAll[n-Nburnin,1:Ntheta] <- thetaProp
      }
    }
    if( is.finite(verbose) && (n%%verbose==0 || n==Nburnin+Nsamp) ) {
      s1 <- noquote(formatC(n,          format="d", digits=5, flag=""))
      s2 <- noquote(formatC(Nburnin,    format="g", digits=5, flag=""))
      s3 <- noquote(formatC(Nsamp,      format="g", digits=5, flag=""))
      s4 <- noquote(formatC(acceptRate, format="f", digits=4, width=7, 
                            flag=""))
      cat(s1, "of", s2, "+", s3, s4, "\n")
    }
  }
  if(demo) {
    return(list(funcSamp=funcSamp, thetaPropAll=thetaPropAll))
  } else {
    return(funcSamp)
  }
}

##### Functions to provide evaluations of prior, likelihood and posterior for the
##### quadratic model, plus sampling from the prior

# theta is vector of parameters; obsdata is 2 column dataframe with names [x,y].

# Return c(log10(prior), log10(likelihood)) (each generally unnormalized) of the quadratic model
logpost.quadraticmodel <- function(theta, obsdata) {
  logprior <- logprior.quadraticmodel(theta)
  if(is.finite(logprior)) { 
    return( c(logprior, loglike.quadraticmodel(theta, obsdata)) )
  } else {
    cat("logprior is not finite!")
    return( c(-Inf, -Inf) )
  }
}

# Return log10(likelihood) for parameters theta and obsdata
# dnorm(..., log=TRUE) returns log base e, so multiply by 1/ln(10) = 0.4342945
# to get log base 10
loglike.quadraticmodel <- function(theta, obsdata) {
  # convert alpha to b_1 and log10(ysig) to ysig
  theta[2] <- tan(theta[2])
  theta[4] <- 10^theta[4]
  modPred <- drop( theta[1:3] %*% t(cbind(1,obsdata$x,obsdata$x^2)) )
  # Dimensions in above mixed vector/matrix multiplication: [Ndat] = [P] %*% [P x Ndat] 
  logLike <- (1/log(10))*sum( dnorm(modPred - obsdata$y, mean=0, sd=theta[4], log=TRUE) )
  return(logLike)
}

# Return log10(unnormalized prior)
logprior.quadraticmodel <- function(theta) {
  b0Prior      <- dnorm(theta[1], mean=0, sd=10)
  alphaPrior   <- 1
  b2Prior      <- dnorm(theta[3], mean=0, sd=5)
  logysigPrior <- 1 
  logPrior <- sum( log10(b0Prior), log10(alphaPrior), log10(b2Prior), log10(logysigPrior) )
  return(logPrior)
}
library(gplots) 
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
### Define true model and simulate experimental data from it

set.seed(57)
Ndat <- 20
xrange <- c(0,10)
sigTrue <- 2
modMat <- c(0.69295, -0.48641, -0.76994)

x.in <- c(2.44, 3.49, 3.78, 3.31, 3.18, 3.15, 3.1, 3.0, 3.6, 3.4)
y.in <- c(129, 464, 189, 562, 589, 598, 606, 562, 360, 494)
x <- (x.in - mean(x.in))/sd(x.in)
y <- (y.in - mean(y.in))/sd(y.in)

plot(x, y,
     col = "red3",
     main = "Zeeman data")

# True parameters, transformed to be conformable with model to be used below
thetaTrue <- c(modMat[1], atan(modMat[2]), modMat[3], log10(sigTrue))
obsdata <- data.frame(cbind(x,y)) # columns must be named "x" and "y"

### Define model and infer the posterior PDF over its parameters

# Model to infer: linear regression with Gaussian noise
# Parameters: intercept b_0, gradient b_1, quadratic term b_2; Gaussian noise sigma, ysig.
# MCMC works on: theta=c(b_0, alpha=tan(b_1), b_2, log10(ysig)), a 1x4 vector.
# Prior PDFs:
# b_0:         N(mean=m, sd=s); m,s estimated from global properties of data
# alpha:       Uniform (0 to 2pi)
# b_2:         N(mean=m, sd=s); m,s estimated from global properties of data
# log10(ysig): Uniform (improper)

# Define covariance matrix of MCMC sampling PDF
sampleCov <- diag(c(0.1, 0.01, 0.01, 0.01)^2)
#lsfit <- lm(y ~ x + I(x^2), data=obsdata)
#summary(lsfit)
thetaInit <- c(-1, atan(-0.5), -1, log10(2.4))
# Run the MCMC to find postSamp, samples of the posterior PDF
set.seed(250)
allSamp <- metrop(func=logpost.quadraticmodel, thetaInit=thetaInit, Nburnin=2e4, Nsamp=2e5,
                   sampleCov=sampleCov, verbose=1e3, obsdata=obsdata)
##   1000 of  20000 +  2e+05  0.9169 
##   2000 of  20000 +  2e+05  0.8290 
##   3000 of  20000 +  2e+05  0.6824 
##   4000 of  20000 +  2e+05  0.5775 
##   5000 of  20000 +  2e+05  0.5201 
##   6000 of  20000 +  2e+05  0.4829 
##   7000 of  20000 +  2e+05  0.4597 
##   8000 of  20000 +  2e+05  0.4327 
##   9000 of  20000 +  2e+05  0.4112 
##  10000 of  20000 +  2e+05  0.4008 
##  11000 of  20000 +  2e+05  0.3951 
##  12000 of  20000 +  2e+05  0.3799 
##  13000 of  20000 +  2e+05  0.3751 
##  14000 of  20000 +  2e+05  0.3685 
##  15000 of  20000 +  2e+05  0.3580 
##  16000 of  20000 +  2e+05  0.3494 
##  17000 of  20000 +  2e+05  0.3417 
##  18000 of  20000 +  2e+05  0.3439 
##  19000 of  20000 +  2e+05  0.3418 
##  20000 of  20000 +  2e+05  0.3345 
##  21000 of  20000 +  2e+05  0.3333 
##  22000 of  20000 +  2e+05  0.3314 
##  23000 of  20000 +  2e+05  0.3301 
##  24000 of  20000 +  2e+05  0.3265 
##  25000 of  20000 +  2e+05  0.3233 
##  26000 of  20000 +  2e+05  0.3227 
##  27000 of  20000 +  2e+05  0.3219 
##  28000 of  20000 +  2e+05  0.3180 
##  29000 of  20000 +  2e+05  0.3175 
##  30000 of  20000 +  2e+05  0.3154 
##  31000 of  20000 +  2e+05  0.3191 
##  32000 of  20000 +  2e+05  0.3268 
##  33000 of  20000 +  2e+05  0.3307 
##  34000 of  20000 +  2e+05  0.3327 
##  35000 of  20000 +  2e+05  0.3367 
##  36000 of  20000 +  2e+05  0.3388 
##  37000 of  20000 +  2e+05  0.3378 
##  38000 of  20000 +  2e+05  0.3367 
##  39000 of  20000 +  2e+05  0.3348 
##  40000 of  20000 +  2e+05  0.3326 
##  41000 of  20000 +  2e+05  0.3323 
##  42000 of  20000 +  2e+05  0.3320 
##  43000 of  20000 +  2e+05  0.3358 
##  44000 of  20000 +  2e+05  0.3373 
##  45000 of  20000 +  2e+05  0.3363 
##  46000 of  20000 +  2e+05  0.3351 
##  47000 of  20000 +  2e+05  0.3351 
##  48000 of  20000 +  2e+05  0.3333 
##  49000 of  20000 +  2e+05  0.3322 
##  50000 of  20000 +  2e+05  0.3318 
##  51000 of  20000 +  2e+05  0.3324 
##  52000 of  20000 +  2e+05  0.3313 
##  53000 of  20000 +  2e+05  0.3292 
##  54000 of  20000 +  2e+05  0.3273 
##  55000 of  20000 +  2e+05  0.3256 
##  56000 of  20000 +  2e+05  0.3260 
##  57000 of  20000 +  2e+05  0.3269 
##  58000 of  20000 +  2e+05  0.3265 
##  59000 of  20000 +  2e+05  0.3247 
##  60000 of  20000 +  2e+05  0.3233 
##  61000 of  20000 +  2e+05  0.3233 
##  62000 of  20000 +  2e+05  0.3228 
##  63000 of  20000 +  2e+05  0.3217 
##  64000 of  20000 +  2e+05  0.3204 
##  65000 of  20000 +  2e+05  0.3205 
##  66000 of  20000 +  2e+05  0.3205 
##  67000 of  20000 +  2e+05  0.3198 
##  68000 of  20000 +  2e+05  0.3184 
##  69000 of  20000 +  2e+05  0.3199 
##  70000 of  20000 +  2e+05  0.3197 
##  71000 of  20000 +  2e+05  0.3189 
##  72000 of  20000 +  2e+05  0.3188 
##  73000 of  20000 +  2e+05  0.3178 
##  74000 of  20000 +  2e+05  0.3171 
##  75000 of  20000 +  2e+05  0.3160 
##  76000 of  20000 +  2e+05  0.3167 
##  77000 of  20000 +  2e+05  0.3157 
##  78000 of  20000 +  2e+05  0.3138 
##  79000 of  20000 +  2e+05  0.3123 
##  80000 of  20000 +  2e+05  0.3111 
##  81000 of  20000 +  2e+05  0.3104 
##  82000 of  20000 +  2e+05  0.3093 
##  83000 of  20000 +  2e+05  0.3090 
##  84000 of  20000 +  2e+05  0.3089 
##  85000 of  20000 +  2e+05  0.3088 
##  86000 of  20000 +  2e+05  0.3074 
##  87000 of  20000 +  2e+05  0.3063 
##  88000 of  20000 +  2e+05  0.3061 
##  89000 of  20000 +  2e+05  0.3054 
##  90000 of  20000 +  2e+05  0.3046 
##  91000 of  20000 +  2e+05  0.3062 
##  92000 of  20000 +  2e+05  0.3073 
##  93000 of  20000 +  2e+05  0.3083 
##  94000 of  20000 +  2e+05  0.3088 
##  95000 of  20000 +  2e+05  0.3091 
##  96000 of  20000 +  2e+05  0.3088 
##  97000 of  20000 +  2e+05  0.3092 
##  98000 of  20000 +  2e+05  0.3096 
##  99000 of  20000 +  2e+05  0.3090 
## 100000 of  20000 +  2e+05  0.3098 
## 101000 of  20000 +  2e+05  0.3094 
## 102000 of  20000 +  2e+05  0.3086 
## 103000 of  20000 +  2e+05  0.3085 
## 104000 of  20000 +  2e+05  0.3083 
## 105000 of  20000 +  2e+05  0.3085 
## 106000 of  20000 +  2e+05  0.3102 
## 107000 of  20000 +  2e+05  0.3103 
## 108000 of  20000 +  2e+05  0.3098 
## 109000 of  20000 +  2e+05  0.3094 
## 110000 of  20000 +  2e+05  0.3095 
## 111000 of  20000 +  2e+05  0.3094 
## 112000 of  20000 +  2e+05  0.3092 
## 113000 of  20000 +  2e+05  0.3087 
## 114000 of  20000 +  2e+05  0.3083 
## 115000 of  20000 +  2e+05  0.3076 
## 116000 of  20000 +  2e+05  0.3073 
## 117000 of  20000 +  2e+05  0.3070 
## 118000 of  20000 +  2e+05  0.3071 
## 119000 of  20000 +  2e+05  0.3063 
## 120000 of  20000 +  2e+05  0.3055 
## 121000 of  20000 +  2e+05  0.3042 
## 122000 of  20000 +  2e+05  0.3032 
## 123000 of  20000 +  2e+05  0.3023 
## 124000 of  20000 +  2e+05  0.3016 
## 125000 of  20000 +  2e+05  0.3013 
## 126000 of  20000 +  2e+05  0.3021 
## 127000 of  20000 +  2e+05  0.3023 
## 128000 of  20000 +  2e+05  0.3020 
## 129000 of  20000 +  2e+05  0.3020 
## 130000 of  20000 +  2e+05  0.3017 
## 131000 of  20000 +  2e+05  0.3014 
## 132000 of  20000 +  2e+05  0.3010 
## 133000 of  20000 +  2e+05  0.3011 
## 134000 of  20000 +  2e+05  0.3020 
## 135000 of  20000 +  2e+05  0.3030 
## 136000 of  20000 +  2e+05  0.3026 
## 137000 of  20000 +  2e+05  0.3028 
## 138000 of  20000 +  2e+05  0.3027 
## 139000 of  20000 +  2e+05  0.3032 
## 140000 of  20000 +  2e+05  0.3031 
## 141000 of  20000 +  2e+05  0.3027 
## 142000 of  20000 +  2e+05  0.3022 
## 143000 of  20000 +  2e+05  0.3020 
## 144000 of  20000 +  2e+05  0.3020 
## 145000 of  20000 +  2e+05  0.3017 
## 146000 of  20000 +  2e+05  0.3013 
## 147000 of  20000 +  2e+05  0.3010 
## 148000 of  20000 +  2e+05  0.3009 
## 149000 of  20000 +  2e+05  0.3017 
## 150000 of  20000 +  2e+05  0.3028 
## 151000 of  20000 +  2e+05  0.3041 
## 152000 of  20000 +  2e+05  0.3047 
## 153000 of  20000 +  2e+05  0.3051 
## 154000 of  20000 +  2e+05  0.3048 
## 155000 of  20000 +  2e+05  0.3040 
## 156000 of  20000 +  2e+05  0.3037 
## 157000 of  20000 +  2e+05  0.3030 
## 158000 of  20000 +  2e+05  0.3023 
## 159000 of  20000 +  2e+05  0.3021 
## 160000 of  20000 +  2e+05  0.3026 
## 161000 of  20000 +  2e+05  0.3030 
## 162000 of  20000 +  2e+05  0.3032 
## 163000 of  20000 +  2e+05  0.3026 
## 164000 of  20000 +  2e+05  0.3028 
## 165000 of  20000 +  2e+05  0.3031 
## 166000 of  20000 +  2e+05  0.3038 
## 167000 of  20000 +  2e+05  0.3041 
## 168000 of  20000 +  2e+05  0.3038 
## 169000 of  20000 +  2e+05  0.3030 
## 170000 of  20000 +  2e+05  0.3026 
## 171000 of  20000 +  2e+05  0.3021 
## 172000 of  20000 +  2e+05  0.3018 
## 173000 of  20000 +  2e+05  0.3015 
## 174000 of  20000 +  2e+05  0.3014 
## 175000 of  20000 +  2e+05  0.3015 
## 176000 of  20000 +  2e+05  0.3009 
## 177000 of  20000 +  2e+05  0.3006 
## 178000 of  20000 +  2e+05  0.3001 
## 179000 of  20000 +  2e+05  0.2997 
## 180000 of  20000 +  2e+05  0.2996 
## 181000 of  20000 +  2e+05  0.2998 
## 182000 of  20000 +  2e+05  0.3004 
## 183000 of  20000 +  2e+05  0.3002 
## 184000 of  20000 +  2e+05  0.2999 
## 185000 of  20000 +  2e+05  0.2996 
## 186000 of  20000 +  2e+05  0.2994 
## 187000 of  20000 +  2e+05  0.2992 
## 188000 of  20000 +  2e+05  0.2986 
## 189000 of  20000 +  2e+05  0.2981 
## 190000 of  20000 +  2e+05  0.2983 
## 191000 of  20000 +  2e+05  0.2995 
## 192000 of  20000 +  2e+05  0.2996 
## 193000 of  20000 +  2e+05  0.3000 
## 194000 of  20000 +  2e+05  0.2996 
## 195000 of  20000 +  2e+05  0.2998 
## 196000 of  20000 +  2e+05  0.3004 
## 197000 of  20000 +  2e+05  0.3011 
## 198000 of  20000 +  2e+05  0.3017 
## 199000 of  20000 +  2e+05  0.3024 
## 200000 of  20000 +  2e+05  0.3026 
## 201000 of  20000 +  2e+05  0.3039 
## 202000 of  20000 +  2e+05  0.3041 
## 203000 of  20000 +  2e+05  0.3044 
## 204000 of  20000 +  2e+05  0.3044 
## 205000 of  20000 +  2e+05  0.3043 
## 206000 of  20000 +  2e+05  0.3041 
## 207000 of  20000 +  2e+05  0.3038 
## 208000 of  20000 +  2e+05  0.3040 
## 209000 of  20000 +  2e+05  0.3042 
## 210000 of  20000 +  2e+05  0.3045 
## 211000 of  20000 +  2e+05  0.3045 
## 212000 of  20000 +  2e+05  0.3045 
## 213000 of  20000 +  2e+05  0.3048 
## 214000 of  20000 +  2e+05  0.3050 
## 215000 of  20000 +  2e+05  0.3049 
## 216000 of  20000 +  2e+05  0.3053 
## 217000 of  20000 +  2e+05  0.3060 
## 218000 of  20000 +  2e+05  0.3062 
## 219000 of  20000 +  2e+05  0.3057 
## 220000 of  20000 +  2e+05  0.3051
# 10^(allSamp[,1]+allSamp[,2]) is the unnormalized posterior at each sample
thinSel  <- seq(from=1, to=nrow(allSamp), by=100) 
postSamp <- allSamp[thinSel,]
# Plot MCMC chains and use density estimation to plot 1D posterior PDFs from these.
# Note that we don't need to do any explicit marginalization to get the 1D PDFs.
par(mfrow=c(4,2), mar=c(3.0,3.5,0.5,0.5), oma=0.5*c(1,1,1,1), mgp=c(1.8,0.6,0), cex=0.9)
parnames <- c(expression(b[0]), expression(paste(alpha, " / rad")), expression(b[2]), 
              expression(paste(log, " ", sigma)))
for(j in 3:6) { # columns of postSamp
  plot(1:nrow(postSamp), postSamp[,j], 
       type="l", 
       xlab="iteration", 
       ylab=parnames[j-2])
  postDen <- density(postSamp[,j], n=2^10)
  plot(postDen$x, postDen$y, 
       type="l",
       lwd=1.5,
       yaxs="i", 
       ylim=1.05*c(0,max(postDen$y)),
       xlab=parnames[j-2], 
       ylab="density")
  abline(v=thetaTrue[j-2], lwd=1.5, lty=3)
}

# Plot all parameter samples in 2D
par(mfcol=c(3,3), mar=c(3.5,3.5,0.5,0.5), oma=c(0.1,0.1,0.1,0.5), mgp=c(2.0,0.8,0))
for(i in 1:3) {
  for(j in 2:4) {
    if(j<=i) {
        plot.new()
      } else {
        plot(postSamp[,i+2], postSamp[,j+2], 
             xlab=parnames[i], 
             ylab=parnames[j], 
             pch=".")
    }
  }
}

# Find MAP and mean solutions.
# MAP = Maximum A Posteriori, i.e. peak of posterior.
# MAP is not the peak in each 1D PDF, but the peak of the 4D PDF.
# mean is easy, because samples have been drawn from the (unnormalized) posterior.
posMAP    <- which.max(postSamp[,1]+postSamp[,2]) 
thetaMAP  <- postSamp[posMAP, 3:6]
thetaMean <- apply(postSamp[,3:6], 2, mean) # Monte Carlo integration

# Overplot MAP solution with original data
par(mfrow=c(1,1), mar=c(3.5,3.5,2,1), oma=0.1*c(2,1,1,1), mgp=c(2.0,0.8,0), cex=1.0)
plotCI(obsdata$x, obsdata$y, 
       xlab="x",
       ylab="y", 
       uiw=10^thetaMAP[4],
       gap=0,
       col="royalblue",
       main="Quadratic fit of the Zeeman data")
xsamp <- seq(from=-3, to=2, length.out=500)
ysamp <- cbind(1,xsamp,xsamp^2) %*% as.matrix(modMat)
ysamp <- cbind(1,xsamp,xsamp^2) %*% as.matrix(c(thetaMAP[1], tan(thetaMAP[2]), thetaMAP[3]))
lines(xsamp, drop(ysamp), 
      lwd=1.5,
      col="red3") # MAP model
legend("topright", c("Data", "Fit"), col=c("royalblue", "red3"), lwd=3)

As can be seen from our data, the students forgot to take measurements in the region \(x \in (2.44, 3.0)\).

  1. Run a Markov Chain Monte Carlo to predict peak height measurements at \(x_1 = 2.8\) mm and \(x_2 = 2.6\) mm.
x2.8 <- (2.8 - mean(x.in))/sd(x.in)
y2.8 <- thetaMAP[1] + x2.8*tan(thetaMAP[2]) + (x2.8^2)*thetaMAP[3] 

x2.6 <- (2.6 - mean(x.in))/sd(x.in)
y2.6 <- thetaMAP[1] + x2.6*tan(thetaMAP[2]) + (x2.6^2)*thetaMAP[3]
  
plot(x, y, 
     col="royalblue",
     xlab="x", 
     ylab="y", 
     main="Zeeman data + predicted data")
points(x2.8, y2.8, col="red3")
points(x2.6, y2.6, col="red3")
lines(xsamp, drop(ysamp), 
      lwd=1.5,
      lty=3,
      col="slategray3") 
legend("topright", c("Data", "Predictions"), col=c("royalblue", "red3"), lwd=3)

Exercise 2

The number of British coal mine disasters has been recorded from 1851 to 1962. By looking at the data it seems that the number of incidents decreased towards the end of the sampling period. We model the data as follows:

data <− NULL
data$D <- c(4,5,4,1,0,4,3,4,0,6,3,3,4,0,2,6,3,3,5,4,5,3,
            1,4,4,1,5,5,3,4,2,5,2,2,3,4,2,1,3,2,1,1,1,1,
            1,3,0,0,1,0,1,1,0,0,3,1,0,3,2,2,0,1,1,1,0,1,
            0,1,0,0,0,2,1,0,0,0,1,1,0,2,2,3,1,1,2,1,1,1,
            1,2,4,2,0,0,0,1,4,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0)
data$N <− 112
library(rjags)
## Loading required package: coda
## Linked to JAGS 4.3.0
## Loaded modules: basemod,bugs
library(coda)

data<- NULL

data$D <- c(4,5,4,1,0,4,3,4,0,6,3,3,4,0,2,6,3,3,5,4,5,3,
            1,4,4,1,5,5,3,4,2,5,2,2,3,4,2,1,3,2,1,1,1,1,
            1,3,0,0,1,0,1,1,0,0,3,1,0,3,2,2,0,1,1,1,0,1,
            0,1,0,0,0,2,1,0,0,0,1,1,0,2,2,3,1,1,2,1,1,1,
            1,2,4,2,0,0,0,1,4,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0)

data$N <- 112

model<- "model.bug"
init <- NULL
init$b0 <- 0
init$b1 <- 0
init$tau <- 50
jm <- jags.model(file = model, data = data, inits = init)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 112
##    Unobserved stochastic nodes: 115
##    Total graph size: 901
## 
## Initializing model
chain <- coda.samples(jm , c("b0", "b1","tau"), n.iter=10000)
summary(chain)
## 
## Iterations = 1001:11000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean      SD  Naive SE Time-series SE
## b0   1.137 0.09328 0.0009328       0.001826
## b1  -1.258 0.15138 0.0015138       0.002721
## tau 39.753 2.08833 0.0208833       0.057518
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%   97.5%
## b0   0.9475  1.076  1.139  1.201  1.3153
## b1  -1.5540 -1.358 -1.258 -1.153 -0.9672
## tau 36.1312 38.267 40.007 40.849 44.2017
plot(chain, col="red3")

chain.df <- as.data.frame(as.mcmc(chain)) 
cat(sprintf("\n Correlation matrix: \n")) 
## 
##  Correlation matrix:
print(cor(chain.df))
##             b0          b1         tau
## b0   1.0000000 -0.56907254 -0.25256520
## b1  -0.5690725  1.00000000 -0.01588095
## tau -0.2525652 -0.01588095  1.00000000
t <- c(5, 10, 20, 40, 80)

for(i in 1:length(t)) {
    chain <- coda.samples(jm, c("b0", "b1","tau"), n.iter = 10000, thin = t[i])
    chain.df <- as.data.frame(as.mcmc(chain))
    
    b0.chain<-as.mcmc(chain.df["b0"])
    b1.chain<-as.mcmc(chain.df["b1"])
    tau.chain<-as.mcmc(chain.df["tau"])
    
    my.lags<-seq(0, 30)
    y1.b0 <- autocorr(b0.chain, lags=my.lags)
    y1.b1 <- autocorr(b1.chain, lags=my.lags)
    y1.tau <- autocorr(tau.chain, lags=my.lags)
    
    par(mfrow=c(3, 1))
    
    plot(my.lags, y1.b0,
         col="red3",
         xlab="lag", 
         ylab="ACF", 
         main=paste("b0, thinning= ",t[i]))
    
    plot(my.lags, y1.b1, 
         col="red3",
         xlab="lag", 
         ylab="ACF", 
         main=paste("b1, thinning= ",t[i]))

    plot(my.lags, y1.tau, 
         col="red3",
         xlab="lag", 
         ylab="ACF", 
         main=paste("tau, thinning= ",t[i]))
    
    #mtext("My title", side = 3, line = -2, outer = TRUE)

    plot(chain, col="red3")
    
    print(summary(chain))
}

## 
## Iterations = 11005:21000
## Thinning interval = 5 
## Number of chains = 1 
## Sample size per chain = 2000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean     SD Naive SE Time-series SE
## b0   1.140 0.0924 0.002066       0.002305
## b1  -1.264 0.1526 0.003411       0.003411
## tau 39.738 2.2186 0.049609       0.060283
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%  97.5%
## b0   0.9614  1.079  1.141  1.203  1.321
## b1  -1.5648 -1.366 -1.265 -1.163 -0.963
## tau 36.1692 37.919 39.926 40.894 45.307

## 
## Iterations = 21010:31000
## Thinning interval = 10 
## Number of chains = 1 
## Sample size per chain = 1000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean      SD Naive SE Time-series SE
## b0   1.142 0.09289 0.002937       0.002790
## b1  -1.268 0.15435 0.004881       0.004881
## tau 39.715 2.05123 0.064866       0.068830
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%   97.5%
## b0   0.9574  1.082  1.142  1.206  1.3282
## b1  -1.5740 -1.375 -1.261 -1.162 -0.9837
## tau 36.1176 38.229 39.973 40.816 43.8091

## 
## Iterations = 31020:41000
## Thinning interval = 20 
## Number of chains = 1 
## Sample size per chain = 500 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean      SD Naive SE Time-series SE
## b0   1.141 0.09623 0.004304       0.004304
## b1  -1.254 0.15978 0.007145       0.007145
## tau 39.659 2.20594 0.098652       0.098652
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%   97.5%
## b0   0.9485  1.079  1.145  1.210  1.3198
## b1  -1.5586 -1.367 -1.249 -1.141 -0.9602
## tau 36.0586 38.072 39.856 40.784 45.9968

## 
## Iterations = 41040:51000
## Thinning interval = 40 
## Number of chains = 1 
## Sample size per chain = 250 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean      SD Naive SE Time-series SE
## b0   1.148 0.09683 0.006124       0.006124
## b1  -1.251 0.15735 0.009952       0.008337
## tau 39.853 2.28422 0.144467       0.144467
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%   97.5%
## b0   0.9527  1.083  1.145  1.216  1.3363
## b1  -1.5478 -1.352 -1.243 -1.155 -0.9411
## tau 36.1397 38.267 39.966 40.941 45.1950

## 
## Iterations = 51080:61000
## Thinning interval = 80 
## Number of chains = 1 
## Sample size per chain = 125 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean      SD Naive SE Time-series SE
## b0   1.135 0.09765 0.008734       0.008734
## b1  -1.268 0.14608 0.013066       0.013066
## tau 39.628 2.08635 0.186608       0.166540
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%   97.5%
## b0   0.9548  1.074  1.137  1.208  1.3144
## b1  -1.5400 -1.357 -1.267 -1.183 -0.9415
## tau 35.8136 37.929 40.011 40.899 43.4112
burn.in=c(500, 1000, 2000, 4000)

for(i in 1:length(burn.in)) {
    chain <- coda.samples(jm, c("b0", "b1","tau"), n.iter = 10000)
    
    par(mfrow=c(1,1), fin=c(2,2))
    plot(chain, col="red3")
    chain.df <- as.data.frame(as.mcmc(chain))

    par(mfrow=c(3,1), fin=c(4,4))
    
    plot(chain.df$b0, chain.df$b1, 
         xlab="b0", 
         ylab="b1", 
         main="b1 vs b0", 
         col="red3")
    
    plot(chain.df$b0, chain.df$tau,
         xlab="b0", 
         ylab="tau",
         main="tau vs b0",
         col="red3")
    
    plot(chain.df$b1, chain.df$tau,
         xlab="b1", 
         ylab="tau", 
         main="tau vs b1",
         col="red3")
    
    print(summary(chain))
}

## 
## Iterations = 61001:71000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean      SD  Naive SE Time-series SE
## b0   1.137 0.09263 0.0009263       0.001794
## b1  -1.257 0.15729 0.0015729       0.002905
## tau 39.718 2.27814 0.0227814       0.061998
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%   97.5%
## b0   0.9536  1.074  1.137  1.200  1.3160
## b1  -1.5659 -1.364 -1.255 -1.149 -0.9522
## tau 36.0733 37.917 39.886 40.846 46.1551

## 
## Iterations = 71001:81000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean     SD Naive SE Time-series SE
## b0   1.141 0.0938 0.000938       0.001793
## b1  -1.262 0.1576 0.001576       0.002897
## tau 39.625 2.1319 0.021319       0.050088
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%   97.5%
## b0   0.9513  1.079  1.143  1.205  1.3189
## b1  -1.5753 -1.366 -1.258 -1.157 -0.9534
## tau 36.1118 37.895 39.884 40.803 44.1252

## 
## Iterations = 81001:91000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean      SD  Naive SE Time-series SE
## b0   1.139 0.09248 0.0009248       0.001729
## b1  -1.257 0.15313 0.0015313       0.002684
## tau 39.667 2.16002 0.0216002       0.054823
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%    50%    75%   97.5%
## b0   0.9534  1.079  1.141  1.202  1.3156
## b1  -1.5612 -1.359 -1.257 -1.153 -0.9603
## tau 36.0954 37.899 39.892 40.814 44.6459

## 
## Iterations = 91001:101000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 10000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##       Mean      SD  Naive SE Time-series SE
## b0   1.138 0.09331 0.0009331       0.001735
## b1  -1.263 0.15328 0.0015328       0.002868
## tau 39.842 2.24524 0.0224524       0.060069
## 
## 2. Quantiles for each variable:
## 
##        2.5%    25%   50%    75%   97.5%
## b0   0.9524  1.075  1.14  1.203  1.3141
## b1  -1.5616 -1.367 -1.26 -1.159 -0.9638
## tau 36.1577 38.204 40.03 40.878 46.2019